/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2005 by Myricom, Inc.  All rights reserved.                 *
 *************************************************************************/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <time.h>

#include "mx_auto_config.h"
#include "myriexpress.h"
#include "mxsmpi.h"
#include "mx_timing.h"


#define ITER 8
#define INNER_ITER 1

#define LENGTH 10000

mx_cycles_t timings[ITER][3];

double times[ITER];

int first_is_recv[ITER];


static int 
double_cmp(const void *ap, const void *bp)
{
  const double *a = ap,*b = bp;
  return *a > *b ? 1 : *a == *b ? 0 : -1;
}

#define MAKE_TAG(a,b) ((uint64_t)(a)<<32 | (uint64_t)(b))
#define MAKE_TAG_APP(a,b) MAKE_TAG(0x8432+(a),(b))

int main(int argc, char **argv)
{
  mx_endpoint_t ep;
  mx_segment_t send_seg;
  mx_segment_t recv_seg;
  mx_status_t s1, s2;
  mx_request_t send_req;
  mx_request_t recv_req;
  uint32_t result;
  char * sbuf, *rbuf;
  int i;
  struct mxsmpi_peer *nodes;
  int myrank, nprocs;
  double avg;
  double gbest,gworst,gavg;


  mx_cycles_counter_init();
  MXSMPI_Init(&argc,&argv);
  ep = MXSMPI_COMM_WORLD->ep;
  MXSMPI_Comm_rank(MXSMPI_COMM_WORLD, &myrank);
  MXSMPI_Comm_size(MXSMPI_COMM_WORLD, &nprocs);
  nodes = MXSMPI_COMM_WORLD->peers;
  sbuf = malloc(LENGTH);
  rbuf = malloc(LENGTH);
  assert(sbuf && rbuf);
  recv_seg.segment_ptr = rbuf;
  recv_seg.segment_length = LENGTH;
  send_seg.segment_ptr = sbuf;
  send_seg.segment_length = LENGTH;
  MXSMPI_Barrier(MXSMPI_COMM_WORLD);
  for (i=0;i<ITER;i++) {
    int j;
    mx_request_t peek1,peek2;
    timings[i][0] = mx_get_cycles();
    int peer = (myrank == nprocs - 1) ? 0 : myrank + 1;
    for (j=0;j<INNER_ITER;j++) {
      mx_irecv(ep, &recv_seg, 1, MAKE_TAG_APP(j, myrank), MX_MATCH_MASK_NONE, NULL, &recv_req);
      mx_isend(ep, &send_seg, 1, nodes[peer].addr, MAKE_TAG_APP(j, peer), NULL, &send_req);
      do {
	mx_ipeek(ep,&peek1, &result);
      } while (result == 0);
      assert(result == 1);
      assert(peek1 == recv_req || peek1 == send_req);
      first_is_recv[i] = (peek1 == recv_req);
      timings[i][2] = mx_get_cycles();
      mx_test(ep, &peek1, &s1, &result);
      assert(result == 1);
      do {
	mx_ipeek(ep, &peek2, &result);
      } while (result == 0);
      assert(result == 1);
      assert(peek2 == recv_req || peek2 == send_req);
      mx_test(ep, &peek2, &s2, &result);
      assert(result == 1);
    }
    timings[i][1] = mx_get_cycles();
  }
  for (i=0;i<ITER;i++) {
    double sr_delay;
    times[i] = (timings[i][1] - timings[i][0])*mx_seconds_per_cycle();
    sr_delay = (timings[i][1] - timings[i][2])*mx_seconds_per_cycle();
    times[i] /= 2*INNER_ITER;
#if 0
    printf("%d,iter=%d: %g  (time=%g us, s/r-delay=%g us, first=%s)\n", 
	   myrank, i, LENGTH/times[i]/1e6,
	   times[i]*1e6, sr_delay*1e6,
	   first_is_recv[i] ? "recv" : "send");
#endif
  }
  avg = (timings[ITER-1][1]- timings[0][0]) *mx_seconds_per_cycle() / (2*INNER_ITER) / ITER;
  qsort(times, ITER, sizeof(times[0]), double_cmp);
  if (myrank == 0) {
    printf("rank-0: best,worst,avg, median\n%g MB/s\t%gMB/s\t%gMB/s\t%gMB/s\n",
	   LENGTH/times[0]/1e6, LENGTH/times[ITER-1]/1e6,LENGTH/avg/1e6, LENGTH/times[ITER/2]/1e6);
  }
  MXSMPI_Reduce(&times[0], &gbest, 1, MXSMPI_DOUBLE, MXSMPI_MIN, 0, MXSMPI_COMM_WORLD);
  MXSMPI_Reduce(&times[ITER-1], &gworst, 1, MXSMPI_DOUBLE, MXSMPI_MAX, 0, MXSMPI_COMM_WORLD);
  MXSMPI_Reduce(&avg, &gavg, 1, MXSMPI_DOUBLE, MXSMPI_SUM, 0, MXSMPI_COMM_WORLD);
  gavg /= nprocs;
  if (myrank == 0) {
    printf("Global: best,worst,avg\n%g MB/s\t%gMB/s\t%gMB\n", LENGTH/gbest/1e6, LENGTH/gworst/1e6, LENGTH/gavg/1e6);
  }
  MXSMPI_Finalize();

  return 0;
}
